#if !BLOCK_X
    #define BLOCK_X 16
#endif
#if !BLOCK_Y
    #define BLOCK_Y BLOCK_X
#endif

cbuffer CB : register(b0)
{
    uint  SrcWidth;
    uint  SrcHeight;
    uint  DstWidth;
    uint  DstHeight;
    float RoiX;
    float RoiY;
};

RWTexture2D<float>  outputTextureY : register(u0);
RWTexture2D<float2> outputTextureUV : register(u1);
Texture2D<float>    inputTextureY : register(t0);
Texture2D<float2>   inputTextureUV : register(t1);
// RWTexture2D<uint>  outputTextureY : register(u0);
// RWTexture2D<uint2> outputTextureUV : register(u1);
// Texture2D<uint>    inputTextureY : register(t0);
// Texture2D<uint2>   inputTextureUV : register(t1);

[numthreads(BLOCK_X, BLOCK_Y, 1)] void CSMain(uint3 Gid : SV_GroupID,
                                              uint3 DTid : SV_DispatchThreadID,
                                              uint3 GTid : SV_GroupThreadID,
                                              uint  GI : SV_GroupIndex) {
    uint2 Position = DTid.xy;
    uint  LowerBoundary_x = RoiX * (SrcWidth - 1);
    uint  LowerBoundary_y = RoiY * (SrcHeight - 1);
    uint2 LowerBoundary_xy = uint2(LowerBoundary_x,LowerBoundary_y);
    uint2 SrcPosition = Position.xy + LowerBoundary_xy;
    SrcPosition.x = clamp(0,SrcPosition.x,SrcWidth - 1);
    SrcPosition.y = clamp(0,SrcPosition.y,SrcHeight - 1);
    outputTextureY[Position] = inputTextureY[SrcPosition];
    if (Position.x % 2 == 0 && Position.y % 2 == 0)
    {
        outputTextureUV[Position / 2] = inputTextureUV[SrcPosition / 2];
    }
}